The following analysis makes use of the penguins
data-set provided as part of the weekly TidyTuesday projects. The goal
of these visualizations is to uncover the trends in body mass, culmen
(bill) length, bill depth among the penguin species. The analysis
concludes by visualizing the location of these penguins on a map.
library(palmerpenguins)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gganimate)
library(ggdark)
library(viridis)
## Loading required package: viridisLite
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(ggiraph)
library(extrafont)
## Registering fonts with R
library(leaflet)
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
##
## Attaching package: 'ggmap'
##
##
## The following object is masked from 'package:plotly':
##
## wind
library(tidygeocoder)
##
## Attaching package: 'tidygeocoder'
##
## The following object is masked from 'package:ggmap':
##
## geocode
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
head(penguins)
str(penguins)
## tibble [344 × 8] (S3: tbl_df/tbl/data.frame)
## $ species : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ island : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ bill_length_mm : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
## $ bill_depth_mm : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
## $ flipper_length_mm: int [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
## $ body_mass_g : int [1:344] 3750 3800 3250 NA 3450 3650 3625 4675 3475 4250 ...
## $ sex : Factor w/ 2 levels "female","male": 2 1 1 NA 1 2 1 2 NA NA ...
## $ year : int [1:344] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
head(penguins_raw)
str(penguins_raw)
## tibble [344 × 17] (S3: tbl_df/tbl/data.frame)
## $ studyName : chr [1:344] "PAL0708" "PAL0708" "PAL0708" "PAL0708" ...
## $ Sample Number : num [1:344] 1 2 3 4 5 6 7 8 9 10 ...
## $ Species : chr [1:344] "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" ...
## $ Region : chr [1:344] "Anvers" "Anvers" "Anvers" "Anvers" ...
## $ Island : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
## $ Stage : chr [1:344] "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" ...
## $ Individual ID : chr [1:344] "N1A1" "N1A2" "N2A1" "N2A2" ...
## $ Clutch Completion : chr [1:344] "Yes" "Yes" "Yes" "Yes" ...
## $ Date Egg : Date[1:344], format: "2007-11-11" "2007-11-11" ...
## $ Culmen Length (mm) : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
## $ Culmen Depth (mm) : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
## $ Flipper Length (mm): num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
## $ Body Mass (g) : num [1:344] 3750 3800 3250 NA 3450 ...
## $ Sex : chr [1:344] "MALE" "FEMALE" "FEMALE" NA ...
## $ Delta 15 N (o/oo) : num [1:344] NA 8.95 8.37 NA 8.77 ...
## $ Delta 13 C (o/oo) : num [1:344] NA -24.7 -25.3 NA -25.3 ...
## $ Comments : chr [1:344] "Not enough blood for isotopes." NA NA "Adult not sampled." ...
## - attr(*, "spec")=
## .. cols(
## .. studyName = col_character(),
## .. `Sample Number` = col_double(),
## .. Species = col_character(),
## .. Region = col_character(),
## .. Island = col_character(),
## .. Stage = col_character(),
## .. `Individual ID` = col_character(),
## .. `Clutch Completion` = col_character(),
## .. `Date Egg` = col_date(format = ""),
## .. `Culmen Length (mm)` = col_double(),
## .. `Culmen Depth (mm)` = col_double(),
## .. `Flipper Length (mm)` = col_double(),
## .. `Body Mass (g)` = col_double(),
## .. Sex = col_character(),
## .. `Delta 15 N (o/oo)` = col_double(),
## .. `Delta 13 C (o/oo)` = col_double(),
## .. Comments = col_character()
## .. )
clean_penguins <- na.omit(penguins)
theme_custom <- function() {
dark_theme_gray(base_family = "Times New Roman") +
theme(
plot.title = element_text(face = "plain", size = 17, margin = margin(t = 10,b = 30), color = "lightgrey"),
plot.subtitle = element_text(face = "italic", color = "grey"),
axis.title = element_text(face = "plain", size = 12, margin = margin(15,15,15,15), color = "lightgrey"),
axis.text = element_text(size = 10, margin = margin(t = 20, r = 20), color = "lightgrey"),
legend.title = element_text(size = 14, color = "lightgrey"),
legend.text = element_text(size = 12, color = "lightgrey"),
panel.background = element_rect(
linewidth = 2),
plot.margin = margin(10,10,10,10),
plot.caption = element_text(size = 8, face = "italic", margin = margin(5,5,5,15), hjust = 0.5)
)
}
The following graph is a scatter-plot of penguin flipper length (mm) and body mass (g). Two things can be observed: first, there is a positive relationship between the two, and second, it tends to cluster by species.
p.1 <- ggplot(clean_penguins, aes(x = flipper_length_mm, y = body_mass_g, color = species, size = species, tooltip = island, data_id = factor(island))) +
geom_point_interactive(size = 3, alpha = 0.6) +
scale_color_viridis_d(option = "A", begin = 0.3, end = 0.7, direction = 1, aesthetics = c("color", "fill")) +
theme_custom() +
labs(title = "Flipper Length vs. Body Mass by Species", x = "Flipper Length (mm)", y = "Body Mass (g)",
caption = "Figure 1: Hover over the points of the graph to see which island the cluster of penguins belongs to.",
color = "Species")
## Inverted geom defaults of fill and color/colour.
## To change them back, use invert_geom_defaults().
p.i.1 <- girafe(ggobj = p.1, options = list(
opts_hover(css = "fill:black;"),
opts_zoom(max = 2),
opts_sizing(width = 1)
))
p.i.1
The following graph is hex-plot showing the relationship between penguin bill length (mm) and bill depth (mm). Once again, a positive relationship and clustering by species is observed.
ggplot(clean_penguins, aes(bill_length_mm, y = bill_depth_mm)) +
geom_hex() +
scale_color_viridis_d(option = "A", begin = 0.3, end = 0.7, direction = 1) +
theme_custom() +
labs(title = "Distribution of Penguin Bill Length and Depth by Species",
x = "Bill Length (mm)",
y = "Bill Depth (mm)") +
facet_wrap(~species)
The following graph shows the distribution of body mass (g) in each species by sex. Two things can be observed from the graph below. First, is that males generally weigh more than females regardless of species. Second, the difference between male and female weight seems to be constant across species.
p.3 <- ggplot(clean_penguins, aes(x = sex, y = body_mass_g, fill = sex)) +
geom_boxplot() +
labs(title = "Body Mass Distribution by Sex and Species",
x = "",
y = "Body Mass (g)",
fill = "Sex") +
theme_custom() +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~species) +
scale_fill_viridis_d(option = "B", begin = 0.3, end = 0.7, direction = 1) +
geom_jitter(alpha = 0.5, show.legend = FALSE, position = position_jitter(width = 0.2, seed = 0))
p.i.3 <- ggplotly(p.3)
p.i.3
The graph below is another illustration of the relationship between bill depth (mm) and length (mm). The density plot may prove to be more useful for visual analysis in some cases as it clearly shows the values around which each species is centered and clustering amongst species.
p.4 <- ggplot(clean_penguins, aes(x = bill_length_mm, y = bill_depth_mm, fill = after_stat(level))) +
geom_density_2d(aes(color = species)) +
labs(title = "Density Contour Plot of Bill Length vs. Depth by Species", x = "Bill Length (mm)", y = "Bill Depth (mm)",
caption = "Figure 4: Hover over the plot to display specific data point values",
color = "Species") +
theme_custom() +
scale_color_viridis_d(option = "B", begin = 0.3, end = 0.7, direction = 1)
p.i.4 <- ggplotly(p.4)
p.i.4
The following graph is a bar graph of the size of each species’ population by island. It is important to note that some islands have more than one species.
p.5 <- ggplot(clean_penguins, aes(x = factor(species))) +
geom_bar(aes(fill = species, text = paste('Frequency:', ..count..))) +
labs(title = "Frequency of Each Species of Penguin by Island", y = "Count",
fill = "Species") +
facet_wrap(~island, scales = "free_x") +
theme_custom() +
scale_fill_viridis_d(option = "G", begin = 0.3, end = 0.7, direction = 1)
## Warning in geom_bar(aes(fill = species, text = paste("Frequency:",
## ..count..))): Ignoring unknown aesthetics: text
p.5 <- p.5 + theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank())
p.i.5 <- ggplotly(p.5, tooltip = "text")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## ℹ The deprecated feature was likely used in the base package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p.i.5
The following animation shows the evolution of individual bill measurements in species from 2007 until 2009. Light variation can be observed, but nonetheless, relatively constant clustering across species is observed over time.
penguins_animation <- penguins %>%
drop_na(bill_length_mm, bill_depth_mm, species, year)
p <- ggplot(penguins_animation, aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
geom_point(size = 3, alpha = 0.6) +
scale_color_viridis_d(option = "D", begin = 0.3, end = 0.7, direction = 1) +
theme_custom() +
labs(title = 'Year: {frame_time}', x = 'Bill Length (mm)', y = 'Bill Depth (mm)', caption = 'The Evolution of Individual Bill Measurements in Species Across Time (2007-09)', color = 'Species') +
theme(legend.position = "right") +
transition_time(year) +
ease_aes('linear')
animate(p, renderer = gifski_renderer(), width = 800, height = 600, duration = 10, end_pause = 50)
anim_save("penguin_individual_bill_measurements_viridis_darkmode.gif")
The figure below shows the locations of the previously discussed islands on a world map. Hover over the points to view more information about them.
islands_df <- data.frame(island = as.character(unique(clean_penguins$island)))
islands_geocoded <- geocode(islands_df, 'island', method = 'osm', lat = latitude , long = longitude)
## Passing 3 addresses to the Nominatim single address geocoder
## Query completed in: 3 seconds
islands <- as.data.table(islands_geocoded)
clean_penguins_dt <- as.data.table(clean_penguins)
clean_penguins_geocoded <- merge(clean_penguins_dt, islands_geocoded,
by = "island", all.x = TRUE)
island_counts <- clean_penguins_geocoded %>%
group_by(island) %>%
summarise(Count = n())
islands <- islands %>%
left_join(island_counts, by = "island")
bbox <- c(left = -170, bottom = -80, right = 170, top = 80) # Full world coordinates
map <- get_stadiamap(bbox, maptype = 'alidade_smooth_dark', zoom = 4)
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
## ℹ 224 tiles needed, this may take a while (try a smaller zoom?)
ggmap <- ggmap(map) +
geom_point(data = islands, aes(longitude, latitude, size = Count, color = island), alpha = 0.7)+
theme(legend.position = "none",
plot.title = element_text(face = "plain", size = 17, margin = margin(t = 10,b = 30)),
axis.title = element_text(face = "plain", size = 12, margin = margin(15,15,15,15))) +
labs(title = "Population Size of Penguins on Each Island", x = "Longitude", y = "Latitude",
color = "Island") + theme_grey() +
scale_size_area(max_size = 7, guide = FALSE) +
scale_color_viridis_d(option = "G", begin = 0.3, end = 0.7, direction = 1)
ggmap.i <- ggplotly(ggmap)
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## ℹ The deprecated feature was likely used in the base package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggmap.i